################################################################################
# Code for replicating "Conflicts over Disarmament and International Security:
# Analyzing Speeches in the UN General Assembly"
# by D. Finke and T. Risse. Contact: finke@ps.au.dk
# The code is organized in the following parts:
# PART 1: Topic Modelling
# PART 2: Reshaping Data
# PART 3: Wordfish Simulations (computationally very intensive!)
# PART 4: Principal Component Analysis
# PART 5: Replication of Figures
# PART 6: Replication of Correlation Analyses
# PART 7: Replication of Regression Analyses
# FYI: The package includes the outputs of steps 1 - 3. Hence it is possible to
# replicate the regression analysis without having to run all simulations.
################################################################################
sink("output.txt")
# Libraries
library(countrycode)
library(data.table)
library(dplyr)
library(foreign)
library(furrr)
library(ggplot2)
library(ggrepel)
library(GPArotation)
library(lme4)
library(lmerTest)
library(lubridate)
library(magrittr)
library(openxlsx)
library(pacman)
library(psych)
library(purrr)
library(quanteda)
library(rlist)
library(RSQLite)
library(rvest)
library(shiny)
library(shinyBS)
library(SnowballC)
library(stm)
library(stminsights)
library(stringr)
library(tictoc)
library(tidytext)
library(tidyr)
library(tm)
library(wordcloud)

################################################################################
# PART 1: Topic Modelling ## Input: Ctt. 1 Speeches "speeches_ctt1.Rdata"
# Output: STM Model "modelctt1_88.rds"
################################################################################

load("speeches_ctt1.RData")

# append stemmed version of vector
countryattrs <- read.csv("countrystemmed.csv")

# Topic Model preparation 
processed_verbatims <- textProcessor(
  speeches$text, 
  metadata = tibble(speeches),
  lowercase = TRUE,
  removestopwords = TRUE,
  customstopwords = countryattrs$names,
  removenumbers = TRUE,
  removepunctuation = TRUE,
  stem = TRUE,
  onlycharacter = TRUE,
  wordLengths = c(3, Inf),
  language = "en",
  verbose = TRUE)

lower.thresh = 60

prepared_verbatims <- prepDocuments(processed_verbatims$documents,
                                    processed_verbatims$vocab,
                                    processed_verbatims$meta,
                                    lower.thresh = lower.thresh)

model <- stm::stm(documents = prepared_verbatims$documents,
                  vocab = prepared_verbatims$vocab,
                  data = prepared_verbatims$meta,
                  K = 8, # change number to K = 5 for model with 5 topics
                  prevalence = ~ as.factor(prepared_verbatims$meta$ccode) +
                    s(prepared_verbatims$meta$year))

saveRDS(model, "modelctt1_88.rds")

################################################################################
# PART 2: Reshaping data for use in subsequent Wordfish simulations
# Output: Document Frequency Matrix dfmk88.Rdata
################################################################################

# Convert to Document Term Matrix
meta <- prepared_verbatims$meta
vocab <- prepared_verbatims$vocab

dtm_list <- vector("list", length = length(prepared_verbatims$documents))
for(i in seq_along(prepared_verbatims$documents)){
  words_num <- prepared_verbatims$documents[[i]][1, ]  
  words_char <- words_num %>% as.character()
  for(j in seq_along(words_num)){
    words_char[j] <- vocab[words_num[j]]
  }
  frequency <- prepared_verbatims$documents[[i]][2, ]
  dtm_list[[i]] <- data.frame(words_char, words_num, frequency)
  cat("."); if(i %% 100 == 0) cat("\n", i)
}

dtm_list2 <- data.frame(words_char = prepared_verbatims$vocab)
dtmy <- data.frame(words_char = prepared_verbatims$vocab)
for(i in 1:length(prepared_verbatims$documents)){
  dtmx <- dtm_list[[i]]
  dtmk <- left_join(dtmy, dtmx)
  dtmk <- subset(dtmk, select = -c(words_num, words_char))
  dtm_list2 <- data.frame(dtm_list2, dtmk) 
}

dtm <-subset(dtm_list2, select = -c(words_char))
rm(dtmk)
rm(dtmx)
rm(dtmy)

# transpose data.frame
dtm <- data.table::transpose(dtm) %>% as.data.frame()
rownames(dtm) <- paste0("text", 1:nrow(dtm))
colnames(dtm) <- vocab

temp <- data.frame(key = rownames(dtm),
                   document = rownames(dtm),
                   dtm)
temp <- gather(temp, key, count, colnames(temp)[3]:colnames(temp)[ncol(temp)])
temp$count <- ifelse(is.na(temp$count), 0, temp$count)
colnames(temp)[2] <- "term"

# cast
dfm <- tidytext::cast_dfm(data = temp, term = term, document = document,
                          value = count)

# add information on year and country, generate ID
docvars(dfm, field = "year") <- prepared_verbatims$meta$year
docvars(dfm, field = "ccode") <- prepared_verbatims$meta$ccode

gig <- data.frame(year = dfm@docvars$year, ccode = dfm@docvars$ccode)
gig2 <- gig %>%                                    
  group_by(ccode, year) %>%
  dplyr::mutate(ID = cur_group_id())
docvars(dfm, field = "ID") <- gig2$ID

save(dfm, file = "dfmK88.RData")

################################################################################
# PART 3: Wordfish simulations
# Input:dfmk88.Rdata ; modelctt1_88.rds
# Output:
# 1. pos_top'x'_words.dta (8 files for each topic x)
# 2. feat_top'x'_words.dta (8 files for each topic x)
# OBS !: This step is very computationally intensive.
# To speed it up, we are sending 8 files "simulations_topic'x'.R" which can be
# run in parallel by starting them in separate R consoles.
# Depending on your machine it might still take several hours.  
################################################################################

source("simulations_topic1.R")
source("simulations_topic2.R")
source("simulations_topic3.R")
source("simulations_topic4.R")
source("simulations_topic5.R")
source("simulations_topic6.R") # only for k=8
source("simulations_topic7.R") # only for k=8
source("simulations_topic8.R") # only for k=8

################################################################################
# PART 4: Principal Component Analysis 
# to extract Latent Positions and Latent Features in 2D Space
# Output: "allpositions" with PC1, PC2 indicating latent positions.
# "allfeatures" with RF1, RF2 and AEXC1, AEXC2 (aggregated exclusvity score)
################################################################################

file1 <- read.dta("pos_top1_words.dta")
file2 <- read.dta("pos_top2_words.dta")
file3 <- read.dta("pos_top3_words.dta")
file4 <- read.dta("pos_top4_words.dta")
file5 <- read.dta("pos_top5_words.dta")
file6 <- read.dta("pos_top6_words.dta") # only for k=8
file7 <- read.dta("pos_top7_words.dta") # only for k=8
file8 <- read.dta("pos_top8_words.dta") # only for k=8

file1 <- file1 %>% distinct()
file2 <- file2 %>% distinct()
file3 <- file3 %>% distinct()
file4 <- file4 %>% distinct()
file5 <- file5 %>% distinct()
file6 <- file6 %>% distinct() # only for k=8
file7 <- file7 %>% distinct() # only for k=8
file8 <- file8 %>% distinct() # only for k=8

allpositions <- list(file1, file2, file3, file4, file5,
                     file6, file7, file8) # only for k=8
allpositions <- allpositions %>% reduce(full_join, by = c("ccode", "year"))

file1 <- read.dta("feat_top1_words.dta")
file2 <- read.dta("feat_top2_words.dta")
file3 <- read.dta("feat_top3_words.dta")
file4 <- read.dta("feat_top4_words.dta")
file5 <- read.dta("feat_top5_words.dta")
file6 <- read.dta("feat_top6_words.dta") # only for k=8
file7 <- read.dta("feat_top7_words.dta") # only for k=8
file8 <- read.dta("feat_top8_words.dta") # only for k=8

allfeatures <- list(file1, file2, file3, file4, file5,
                    file6, file7, file8) # only for k=8
allfeatures <- allfeatures %>% reduce(full_join, by = "features")

rm(file1, file2, file3, file4, file5,
   file6, file7, file8) # only for k=8

select_pos <- allpositions[, c("thet_top1", "thet_top2", "thet_top3",
                               "thet_top4","thet_top5",
                               "thet_top6", "thet_top7", # only for k=8
                               "thet_top8" )] # only for k=8

pca_result <- prcomp(select_pos, scale. = TRUE)

loadings <- pca_result$rotation[, 1:2]

varimax_result <- varimax(loadings)
rotated_loadings <- varimax_result$loadings

rotated_loadings_matrix <- unclass(rotated_loadings)

rotated_loadings_df <- as.data.frame(rotated_loadings_matrix)

comp1 <- as.matrix(rotated_loadings_df[1:8, 1]) # 1:5 for k=5
comp2 <- as.matrix(rotated_loadings_df[1:8, 2]) # 1:5 for k=5

rotation_matrix <- varimax_result$rotmat 
rotated_scores <- pca_result$x[, 1:2] %*% rotation_matrix 
rotated_scores_df <- as.data.frame(rotated_scores)
colnames(rotated_scores_df) <- c("PC1", "PC2")
rotated_scores_df$PC2 <- rotated_scores_df$PC2 * (-1)
  
allpositions <- cbind(allpositions, rotated_scores_df)

# Calculate scores for exclusivity scores
model <- readRDS("modelctt1_88.rds")
FREQSCORE <- apply(model$beta$logbeta[[1]], 1,
                   data.table::frank) / ncol(model$beta$logbeta[[1]])
EXCLSCORE <- apply(t(t(model$beta$logbeta[[1]]) -
                       matrixStats::colLogSumExps((model$beta$logbeta[[1]]))),
                   1, data.table::frank) / ncol(model$beta$logbeta[[1]])
FREXSCORE <- 1 / (0.5 / FREQSCORE + (1 - 0.5) / EXCLSCORE)
rownames(FREXSCORE) <- rownames(FREQSCORE) <- rownames(EXCLSCORE)
FREXSCORE <- as.data.frame(FREXSCORE)
colnames(FREXSCORE) <- paste(colnames(FREXSCORE), "_1")
measurement <- as.data.frame(EXCLSCORE)

select_feat <- allfeatures[, c("bet_top1", "bet_top2", "bet_top3",
                               "bet_top4","bet_top5",
                               "bet_top6", "bet_top7", # only for k=8
                               "bet_top8" )] # only for k=8

w_df1 <- select_feat
for(i in 1:ncol(w_df1)){
  w_df1[, i] <- w_df1[, i] * comp1[i, ]
}
RF1 <- rowSums(w_df1)
w_df2 <- select_feat
for(i in 1:ncol(w_df2)){
  w_df2[, i] <- w_df2[, i] * comp2[i, ]
}
RF2 <- rowSums(w_df2)
RF2 <- RF2 * (-1)

w_df1 <- measurement
for(i in 1:ncol(w_df1)){
  w_df1[, i] <- w_df1[, i] * abs(comp1[i, ])
}
AEXC1 <- rowSums(w_df1)
w_df2 <- measurement
for(i in 1:ncol(w_df2)){
  w_df2[, i] <- w_df2[, i] * abs(comp2[i, ])
}
AEXC2 <- rowSums(w_df2)

allfeatures <- cbind(allfeatures, measurement, FREXSCORE,
                     RF1, RF2, AEXC1, AEXC2)

write.dta(allpositions, "allpositions.dta")
write.dta(allfeatures, "allfeatures.dta")

################################################################################
#PART 5: Figures
################################################################################

allfeatures <- read.dta("allfeatures.dta")
allpositions <- read.dta("allpositions.dta")
thirdvariables <- read.xlsx("thirdvariables.xlsx")

allpositions_agg <- allpositions %>% group_by(ccode) %>%
  summarize(thet_top1 = mean(thet_top1, na.rm = T),
            thet_top2 = mean(thet_top2, na.rm = T),
            thet_top3 = mean(thet_top3, na.rm = T),
            thet_top4 = mean(thet_top4, na.rm = T),
            thet_top5 = mean(thet_top5, na.rm = T),
            thet_top6 = mean(thet_top6, na.rm = T),
            thet_top7 = mean(thet_top7, na.rm = T),
            thet_top8 = mean(thet_top8, na.rm = T),
            PC1 = mean(PC1, na.rm = T), PC2 = mean(PC2, na.rm = T),
            nyears = n())
thirdvariables_agg <- thirdvariables %>% group_by(ccode) %>%
  summarize(country = paste(unique(country)),
            countryabb = paste(unique(countryabb)),
            bailey_all = mean(bailey_all, na.rm = T),
            bailey_nuc = mean(bailey_nuc, na.rm = T),
            risse_all = mean(risse_all, na.rm = T),
            risse_factor1 = mean(risse_factor1, na.rm = T),
            risse_factor2 = mean(risse_factor2, na.rm = T),
            barnumlo = mean(barnumlo, na.rm = T),
            efrat = mean(efrat, na.rm = T))

allpositions <- merge(allpositions, thirdvariables, by = c("ccode", "year"),
                      all.x = T)
allpositions_agg <- merge(allpositions_agg, thirdvariables_agg, by = "ccode",
                          all.x = T)

# Figure 2: Rotated Betas and Aggregated Exclusivity (Dimension 1)
f2 <- ggplot(allfeatures, aes(RF1, AEXC1)) +
  guides(color = "none") +
  xlab("Rotated Betas") +
  ylab("Aggregated Exclusivity") + 
  theme(axis.line.x = element_line(arrow = arrow()),
        axis.line.y = element_line(arrow = arrow()),
        panel.background = element_rect(fill = "white")) +
  geom_text(aes(label = features), color = "dimgray", size = 2, 
            vjust = 0.5, hjust = 0.5, alpha = 0.3) +
  geom_text_repel(data = allfeatures[allfeatures$features == "sahel"
                                     | allfeatures$features == "african"
                                     | allfeatures$features == "caribbean"
                                     | allfeatures$features == "mercosur"
                                     | allfeatures$features == "violenc"
                                     | allfeatures$features == "displac"
                                     | allfeatures$features == "existenti"
                                     | allfeatures$features == "contamin"
                                     | allfeatures$features == "unregul"
                                     | allfeatures$features == "assist"
                                     | allfeatures$features == "donor"
                                     | allfeatures$features == "att"
                                     | allfeatures$features == "paragraph"
                                     | allfeatures$features == "vote"
                                     | allfeatures$features == "comment"
                                     | allfeatures$features == "organiz"
                                     | allfeatures$features == "abstain"
                                     | allfeatures$features == "reserv"
                                     | allfeatures$features == "realist"
                                     | allfeatures$features == "defens"
                                     | allfeatures$features == "aggress"
                                     | allfeatures$features == "strike"
                                     | allfeatures$features == "missil"
                                     | allfeatures$features == "plutonium", ],
                  aes(label = features, color = "black"), size = 4) +
  scale_color_manual(values = c("black" = "black", "dimgray" = "dimgray")) +
  coord_cartesian(xlim = c(-3, 3))

png("figure2.png", res = 250, width = 2200, height = 1200)
f2
dev.off()

# Figure 3: Rotated Betas and Aggregated Exclusivity (Dimension 2)
f3 <- ggplot(allfeatures, aes(RF2, AEXC2)) +
  guides(color = "none") +
  xlab("Rotated Betas") +
  ylab("Aggregated Exclusivity") + 
  theme(axis.line.x = element_line(arrow = arrow()),
        axis.line.y = element_line(arrow = arrow()),
        panel.background = element_rect(fill = "white")) +
  geom_text(aes(label = features), color = "dimgray", size = 2, 
            vjust = 0.5, hjust = 0.5, alpha = 0.3) +
  geom_text_repel(data = allfeatures[allfeatures$features == "abm"
                                     | allfeatures$features == "fmct"
                                     | allfeatures$features == "start"
                                     | allfeatures$features == "bwc"
                                     | allfeatures$features == "enlarg"
                                     | allfeatures$features == "underpin"
                                     | allfeatures$features == "reinforc"
                                     | allfeatures$features == "deepen"
                                     | allfeatures$features == "steadili"
                                     | allfeatures$features == "ration"
                                     | allfeatures$features == "realist"
                                     | allfeatures$features == "pragmat"
                                     | allfeatures$features == "reject"
                                     | allfeatures$features == "failur"
                                     | allfeatures$features == "weak"
                                     | allfeatures$features == "doubl"
                                     | allfeatures$features == "discrimin"
                                     | allfeatures$features == "imbal"
                                     | allfeatures$features == "inalien"
                                     | allfeatures$features == "instal"
                                     | allfeatures$features == "energi"
                                     | allfeatures$features == "baseless"
                                     | allfeatures$features == "unjustifi"
                                     | allfeatures$features == "innoc", ],
                  aes(label = features, color = "black"), size = 4) +
  scale_color_manual(values = c("black" = "black", "dimgray" = "dimgray")) +
  coord_cartesian(xlim = c(-1.5, 1.5))

png("figure3.png", res = 250, width = 2200, height = 1200)
f3
dev.off()

# Figure 4: Country Positions on Both Dimensions
f4 <- ggplot(allpositions_agg[allpositions_agg$nyears > 2, ],
             aes(x = PC1, y = PC2, label = countryabb)) +
  theme_bw() +
  labs(x = "Security                                    Factor 1: Conventional Weapons Dimension                                    Humanitarian",
       y = "Status Quo                              Factor 2: WMD Dimension                              Challenging") +
  geom_text(aes(label = countryabb), hjust = 0, vjust = 0, colour = "gray") +
  geom_text(aes(label = ifelse(country == "United States of America"
                               | country == "European Union"
                               | country == "Democratic Republic of the Congo"
                               | country == "Honduras"
                               | country == "Cambodia"
                               | country == "Turkey"
                               | country == "Brazil"
                               | country == "North Korea"
                               | country == "Russia"
                               | country == "South Africa"
                               | country == "Iraq"
                               | country == "Sudan"
                               | country == "China"
                               | country == "Iran"
                               | country == "Denmark"
                               | country == "Syria",
                               countryabb, "")),
            hjust = 0, vjust = 0) +
  coord_cartesian(xlim = c(-4, 4)) +
  scale_x_continuous(breaks = c(-4, -3, -2, -1, 0, 1, 2, 3, 4))

png("figure4.png", res = 600, width = 5000, height = 4000)
f4
dev.off()

# Figure 5: Development of Positions (Dimension 1)
f5 <- ggplot(data = allpositions) +
  geom_smooth(data = allpositions,
              aes(x = year, y = PC1,
                  color = "Average", linetype = "Average"),
              method = "loess", se = FALSE, linewidth = 1.5, span = 0.4) +
  geom_smooth(data = subset(allpositions,
                            country == "United States of America"),
              aes(x = year, y = PC1,
                  color = "USA", linetype = "USA"),
              method = "loess", se = FALSE, linewidth = 1.5, span = 0.4) +
  geom_smooth(data = subset(allpositions, country == "Ukraine"),
              aes(x = year, y = PC1,
                  color = "Ukraine", linetype = "Ukraine"),
              method = "loess", se = FALSE, linewidth = 1.5, span = 0.4) +
  geom_smooth(data = subset(allpositions, country == "Colombia"),
              aes(x = year, y = PC1,
                  color = "Colombia", linetype = "Colombia"),
              method = "loess", se = FALSE, linewidth = 1.5, span = 0.4) +
  geom_smooth(data = subset(allpositions, country == "Russia"),
              aes(x = year, y = PC1,
                  color = "Russia", linetype = "Russia"),
              method = "loess", se = FALSE, linewidth = 1.5, span = 0.4) +
  scale_color_manual(name = "Countries",
                     values = c("Average" = "black", "USA" = "gray27",
                                "Russia" = "gray47",
                                "Colombia" = "gray67",
                                "Ukraine" = "gray87")) +
  scale_linetype_manual(name = "Countries",
                        values = c("Average" = "solid", "USA" = "dashed",
                                   "Russia" = "longdash",
                                   "Colombia" = "dotdash",
                                   "Ukraine" = "dotted")) +
  scale_size_manual(values = 1.5) +
  theme_bw() +
  theme(legend.position = "bottom", legend.key.width = unit(2, "cm")) +
  labs(x = "Year", y = "Factor 1: Conventional Weapons Dimension") +
  coord_cartesian(xlim = c(1993, 2019), ylim = c(-5.5, 3)) +
  scale_x_continuous(breaks = c(1995, 2000, 2005, 2010, 2015))

png("figure5.png", res = 600, width = 5000, height = 3500)
f5
dev.off()

# Figure 6: Development of Positions (Dimension 2)
f6 <- ggplot(data = allpositions) +
  geom_smooth(data = allpositions,
              aes(x = year, y = PC2,
                  color = "Average", linetype = "Average"),
              method = "loess", se = FALSE, linewidth = 1.5, span = 0.4) +
  geom_smooth(data = subset(allpositions,
                            country == "United States of America"),
              aes(x = year, y = PC2,
                  color = "USA", linetype = "USA"),
              method = "loess", se = FALSE, linewidth = 1.5, span = 0.4) +
  geom_smooth(data = subset(allpositions, country == "China"),
              aes(x = year, y = PC2,
                  color = "China", linetype = "China"),
              method = "loess", se = FALSE, linewidth = 1.5, span = 0.4) +
  geom_smooth(data = subset(allpositions, country == "Iran"),
              aes(x = year, y = PC2,
                  color = "Iran", linetype = "Iran"),
              method = "loess", se = FALSE, linewidth = 1.5, span = 0.4) +
  scale_color_manual(name = "Countries",
                     values = c("Average" = "black", "USA" = "gray30",
                                "China" = "gray80",
                                "Iran" = "gray55")) +
  scale_linetype_manual(name = "Countries",
                        values = c("Average" = "solid", "USA" = "dashed",
                                   "China" = "dotdash",
                                   "Iran" = "dotted")) +
  scale_size_manual(values = 1.5) +
  theme_bw() +
  theme(legend.position = "bottom", legend.key.width = unit(2, "cm")) +
  labs(x = "Year", y = "Factor 2: WMD Dimension") +
  coord_cartesian(xlim = c(1993, 2019), ylim = c(-2, 2)) +
  scale_x_continuous(breaks = c(1995, 2000, 2005, 2010, 2015))

png("figure6.png", res = 600, width = 5000, height = 3500)
f6
dev.off()

# Figure A2: Betas and Exclusivity (Topic 1)
fa2 <- ggplot(allfeatures, aes(bet_top1, V1)) +
  guides(color = "none") +
  xlab("Betas") +
  ylab("Exclusivity") + 
  theme(axis.line.x = element_line(arrow = arrow()),
        axis.line.y = element_line(arrow = arrow()),
        panel.background = element_rect(fill = "white")) +
  geom_text(aes(label = features), color = "dimgray", size = 2, 
            vjust = 0.5, hjust = 0.5, alpha = 0.3) +
  geom_text_repel(data = allfeatures[allfeatures$features == "africa"
                                     | allfeatures$features == "caribbean"
                                     | allfeatures$features == "sahel"
                                     | allfeatures$features == "mercosur"
                                     | allfeatures$features == "latin"
                                     | allfeatures$features == "western"
                                     | allfeatures$features == "atlant"
                                     | allfeatures$features == "middl"
                                     | allfeatures$features == "east"
                                     | allfeatures$features == "caucasus", ],
                  aes(label = features, color = "black"), size = 4) +
  scale_color_manual(values = c("black" = "black", "dimgray" = "dimgray")) +
  coord_cartesian(xlim = c(-2, 1.5))

png("figurea2.png", res = 250, width = 2200, height = 1200)
fa2
dev.off()

# Figure A3: Betas and Exclusivity (Topic 3)
fa3 <- ggplot(allfeatures, aes(bet_top3, V3)) +
  guides(color = "none") +
  xlab("Betas") +
  ylab("Exclusivity") + 
  theme(axis.line.x = element_line(arrow = arrow()),
        axis.line.y = element_line(arrow = arrow()),
        panel.background = element_rect(fill = "white")) +
  geom_text(aes(label = features), color = "dimgray", size = 2, 
            vjust = 0.5, hjust = 0.5, alpha = 0.3) +
  geom_text_repel(data = allfeatures[allfeatures$features == "contamin"
                                     | allfeatures$features == "survivor"
                                     | allfeatures$features == "displac"
                                     | allfeatures$features == "disabl"
                                     | allfeatures$features == "devast"
                                     | allfeatures$features == "occupi"
                                     | allfeatures$features == "troop"
                                     | allfeatures$features == "aggress"
                                     | allfeatures$features == "armament"
                                     | allfeatures$features == "defens"
                                     | allfeatures$features == "reintegr", ],
                  aes(label = features, color = "black"), size = 4) +
  scale_color_manual(values = c("black" = "black", "dimgray" = "dimgray")) +
  coord_cartesian(xlim = c(-1, 2))


png("figurea3.png", res = 250, width = 2200, height = 1200)
fa3
dev.off()

# Figure A4: Betas and Exclusivity (Topic 4)
fa4 <- ggplot(allfeatures, aes(bet_top4, V4)) +
  guides(color = "none") +
  xlab("Betas") +
  ylab("Exclusivity") + 
  theme(axis.line.x = element_line(arrow = arrow()),
        axis.line.y = element_line(arrow = arrow()),
        panel.background = element_rect(fill = "white")) +
  geom_text(aes(label = features), color = "dimgray", size = 2, 
            vjust = 0.5, hjust = 0.5, alpha = 0.3) +
  geom_text_repel(data = allfeatures[allfeatures$features == "fals"
                                     | allfeatures$features == "accus"
                                     | allfeatures$features == "occupi"
                                     | allfeatures$features == "baseless"
                                     | allfeatures$features == "aggress"
                                     | allfeatures$features == "coordin"
                                     | allfeatures$features == "coalit"
                                     | allfeatures$features == "outreach"
                                     | allfeatures$features == "ratifi"
                                     | allfeatures$features =="regulatori", ],
                  aes(label = features, color = "black"), size = 4) +
  scale_color_manual(values = c("black" = "black", "dimgray" = "dimgray")) +
  coord_cartesian(xlim = c(-1, 2))


png("figurea4.png", res = 250, width = 2200, height = 1200)
fa4
dev.off()

# Figure A5: Betas and Exclusivity (Topic 6)
fa5 <- ggplot(allfeatures, aes(bet_top6, V6)) +
  guides(color = "none") +
  xlab("Betas") +
  ylab("Exclusivity") + 
  theme(axis.line.x = element_line(arrow = arrow()),
        axis.line.y = element_line(arrow = arrow()),
        panel.background = element_rect(fill = "white")) +
  geom_text(aes(label = features), color = "dimgray", size = 2, 
            vjust = 0.5, hjust = 0.5, alpha = 0.3) +
  geom_text_repel(data = allfeatures[allfeatures$features == "fmct"
                                     | allfeatures$features == "start"
                                     | allfeatures$features == "steadi"
                                     | allfeatures$features == "realist"
                                     | allfeatures$features == "pragmat"
                                     | allfeatures$features == "nato"
                                     | allfeatures$features == "inalien"
                                     | allfeatures$features == "doubl"
                                     | allfeatures$features == "imbal"
                                     | allfeatures$features == "discrimin"
                                     | allfeatures$features == "pressur", ],
                  aes(label = features, color = "black"), size = 4) +
  scale_color_manual(values = c("black" = "black", "dimgray" = "dimgray")) +
  coord_cartesian(xlim = c(-1.5, 1))


png("figurea5.png", res = 250, width = 2200, height = 1200)
fa5
dev.off()

# Figure A6: Betas and Exclusivity (Topic 7)
fa6 <- ggplot(allfeatures, aes(bet_top7, V7)) +
  guides(color = "none") +
  xlab("Betas") +
  ylab("Exclusivity") + 
  theme(axis.line.x = element_line(arrow = arrow()),
        axis.line.y = element_line(arrow = arrow()),
        panel.background = element_rect(fill = "white")) +
  geom_text(aes(label = features), color = "dimgray", size = 2, 
            vjust = 0.5, hjust = 0.5, alpha = 0.3) +
  geom_text_repel(data = allfeatures[allfeatures$features == "sky"
                                     | allfeatures$features == "orbit"
                                     | allfeatures$features == "satellit"
                                     | allfeatures$features == "nato"
                                     | allfeatures$features == "abm"
                                     | allfeatures$features == "traffic"
                                     | allfeatures$features == "social"
                                     | allfeatures$features == "climat"
                                     | allfeatures$features == "inalien"
                                     | allfeatures$features == "humankind", ],
                  aes(label = features, color = "black"), size = 4) +
  scale_color_manual(values = c("black" = "black", "dimgray" = "dimgray")) +
  coord_cartesian(xlim = c(-1, 1.5))


png("figurea6.png", res = 250, width = 2200, height = 1200)
fa6
dev.off()

# Figure A7: Betas and Exclusivity (Topic 8)
fa7 <- ggplot(allfeatures, aes(bet_top8, V8)) +
  guides(color = "none") +
  xlab("Betas") +
  ylab("Exclusivity") + 
  theme(axis.line.x = element_line(arrow = arrow()),
        axis.line.y = element_line(arrow = arrow()),
        panel.background = element_rect(fill = "white")) +
  geom_text(aes(label = features), color = "dimgray", size = 2, 
            vjust = 0.5, hjust = 0.5, alpha = 0.3) +
  geom_text_repel(data = allfeatures[allfeatures$features == "traffic"
                                     | allfeatures$features == "demobil"
                                     | allfeatures$features == "displac"
                                     | allfeatures$features == "scourg"
                                     | allfeatures$features == "refug"
                                     | allfeatures$features == "violenc"
                                     | allfeatures$features == "sponsor"
                                     | allfeatures$features == "coalit"
                                     | allfeatures$features == "regist"
                                     | allfeatures$features == "streamlin"
                                     | allfeatures$features =="represent"
                                     | allfeatures$features =="discuss", ],
                  aes(label = features, color = "black"), size = 4) +
  scale_color_manual(values = c("black" = "black", "dimgray" = "dimgray")) +
  coord_cartesian(xlim = c(-2, 1.5))


png("figurea7.png", res = 250, width = 2200, height = 1200)
fa7
dev.off()

################################################################################
#PART 6: Correlations
################################################################################

# Country Level
cor.test(allpositions_agg$PC1, allpositions_agg$risse_factor1)
cor.test(allpositions_agg$PC2, allpositions_agg$risse_factor1)

cor.test(allpositions_agg$PC1, allpositions_agg$risse_factor2)
cor.test(allpositions_agg$PC2, allpositions_agg$risse_factor2)

cor.test(allpositions_agg$PC1, allpositions_agg$bailey_all)
cor.test(allpositions_agg$PC2, allpositions_agg$bailey_all)

cor.test(allpositions_agg$PC1, allpositions_agg$bailey_nuc)
cor.test(allpositions_agg$PC2, allpositions_agg$bailey_nuc)

cor.test(allpositions_agg$PC1, allpositions_agg$barnumlo)
cor.test(allpositions_agg$PC2, allpositions_agg$barnumlo)

cor.test(allpositions_agg$PC1, allpositions_agg$efrat)
cor.test(allpositions_agg$PC2, allpositions_agg$efrat)

# Country-Year Level
cor.test(allpositions$PC1, allpositions$risse_factor1)
cor.test(allpositions$PC2, allpositions$risse_factor1)

cor.test(allpositions$PC1, allpositions$risse_factor2)
cor.test(allpositions$PC2, allpositions$risse_factor2)

cor.test(allpositions$PC1, allpositions$bailey_all)
cor.test(allpositions$PC2, allpositions$bailey_all)

cor.test(allpositions$PC1, allpositions$bailey_nuc)
cor.test(allpositions$PC2, allpositions$bailey_nuc)

cor.test(allpositions$PC1, allpositions$barnumlo)
cor.test(allpositions$PC2, allpositions$barnumlo)

cor.test(allpositions$PC1, allpositions$efrat)
cor.test(allpositions$PC2, allpositions$efrat)

################################################################################
#PART 7: Regression
################################################################################
sink("outputtables.txt")
# Arms Transfers + Random Effects
model_armstr_re_1 <- lmer(PC1 ~ log(exports + 1) + log(imports + 1)
                          + (1 | country) + (1 | year), data = allpositions)
summary(model_armstr_re_1)

model_armstr_re_2 <- lmer(PC2 ~ log(exports + 1) + log(imports + 1)
                          + (1 | country) + (1 | year), data = allpositions)
summary(model_armstr_re_2)

# WMD + Random Effects
model_wmd_re_1 <- lmer(PC1 ~ nuc + chem
                       + (1 | country) + (1 | year), data = allpositions)
summary(model_wmd_re_1)

model_wmd_re_2 <- lmer(PC2 ~ nuc + chem
                       + (1 | country) + (1 | year), data = allpositions)
summary(model_wmd_re_2)

# Arms Transfers + Controls + Random Effects
model_armstr_contr_1 <- lmer(PC1 ~ log(exports + 1) + log(imports + 1)
                             + v2x_polyarchy + log(gdppc) + log(cinc)
                             + armedconflict + region
                             + (1 | country) + (1 | year), data = allpositions)
summary(model_armstr_contr_1)

model_armstr_contr_2 <- lmer(PC2 ~ log(exports + 1) + log(imports + 1)
                             + v2x_polyarchy + log(gdppc) + log(cinc)
                             + armedconflict + region
                             + (1 | country) + (1 | year), data = allpositions)
summary(model_armstr_contr_2)

# WMD + Controls + Random Effects
model_wmd_contr_1 <- lmer(PC1 ~ nuc + chem
                          + v2x_polyarchy + log(gdppc) + log(cinc)
                          + armedconflict + region
                          + (1 | country) + (1 | year), data = allpositions)
summary(model_wmd_contr_1)

model_wmd_contr_2 <- lmer(PC2 ~ nuc + chem
                          + v2x_polyarchy + log(gdppc) + log(cinc)
                          + armedconflict + region
                          + (1 | country) + (1 | year), data = allpositions)
summary(model_wmd_contr_2)

# Arms Transfers + WMD + Controls + Random Effects
model_all_1 <- lmer(PC1 ~ log(exports + 1) + log(imports + 1) + nuc + chem
                    + v2x_polyarchy + log(gdppc) + log(cinc)
                    + armedconflict + region
                    + (1 | country) + (1 | year), data = allpositions)
summary(model_all_1)

model_all_2 <- lmer(PC2 ~ log(exports + 1) + log(imports + 1) + nuc + chem
                    + v2x_polyarchy + log(gdppc) + log(cinc)
                    + armedconflict + region
                    + (1 | country) + (1 | year), data = allpositions)
summary(model_all_2)

sink()

sink()
